{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Bio import PDB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Structure exists: './pdb1tup.ent' \n",
      "Downloading PDB structure '1OLG'...\n",
      "Downloading PDB structure '1YCQ'...\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'./pdb1ycq.ent'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "repository = PDB.PDBList()\n",
    "repository.retrieve_pdb_file('1TUP', pdir='.', file_format='pdb')\n",
    "repository.retrieve_pdb_file('1OLG', pdir='.', file_format='pdb')\n",
    "repository.retrieve_pdb_file('1YCQ', pdir='.', file_format='pdb')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:90: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6146.\n",
      "  PDBConstructionWarning)\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:90: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6147.\n",
      "  PDBConstructionWarning)\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:90: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6148.\n",
      "  PDBConstructionWarning)\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:90: PDBConstructionWarning: WARNING: Chain E is discontinuous at line 6149.\n",
      "  PDBConstructionWarning)\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:90: PDBConstructionWarning: WARNING: Chain F is discontinuous at line 6171.\n",
      "  PDBConstructionWarning)\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:90: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6185.\n",
      "  PDBConstructionWarning)\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:90: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6383.\n",
      "  PDBConstructionWarning)\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:90: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6453.\n",
      "  PDBConstructionWarning)\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:90: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 1125.\n",
      "  PDBConstructionWarning)\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:90: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 1160.\n",
      "  PDBConstructionWarning)\n"
     ]
    }
   ],
   "source": [
    "parser = PDB.PDBParser()\n",
    "p53_1tup = parser.get_structure('P 53 - DNA Binding', 'pdb1tup.ent')\n",
    "p53_1olg = parser.get_structure('P 53 - Tetramerization', 'pdb1olg.ent')\n",
    "p53_1ycq = parser.get_structure('P 53 - Transactivation', 'pdb1ycq.ent')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                name:  tumor suppressor p53 complexed with dna\n",
      "                head: antitumor protein/dna\n",
      "     deposition_date: 1995-07-11\n",
      "        release_date: 1995-07-11\n",
      "    structure_method: x-ray diffraction\n",
      "          resolution: 2.2\n",
      " structure_reference:\n",
      "                   -> n.p.pavletich,k.a.chambers,c.o.pabo the dna-binding domain of p53 contains the four conserved regions and the major mutation hot spots genes dev. v. 7 2556 1993 issn 0890-9369 \n",
      "                   -> b.vogelstein,k.w.kinzler p53 function and dysfunction cell(cambridge,mass.) v. 70 523 1992 issn 0092-8674 \n",
      "   journal_reference: y.cho,s.gorina,p.d.jeffrey,n.p.pavletich crystal structure of a p53 tumor suppressor-dna complex: understanding tumorigenic mutations. science v. 265 346 1994 issn 0036-8075 8023157 \n",
      "              author: Y.Cho,S.Gorina,P.D.Jeffrey,N.P.Pavletich\n",
      "\n",
      "            compound:\n",
      "\n",
      "                       1:\n",
      "                        misc: \n",
      "                    molecule: dna (5'-d(*tp*tp*tp*cp*cp*tp*ap*gp*ap*cp*tp*tp*gp*cp*cp*cp*a p*ap*tp*tp*a)-3') \n",
      "                       chain: e\n",
      "                  engineered: yes\n",
      "\n",
      "\n",
      "                       2:\n",
      "                        misc: \n",
      "                    molecule: dna (5'-d(*ap*tp*ap*ap*tp*tp*gp*gp*gp*cp*ap*ap*gp*tp*cp*tp*a p*gp*gp*ap*a)-3') \n",
      "                       chain: f\n",
      "                  engineered: yes\n",
      "\n",
      "\n",
      "                       3:\n",
      "                        misc: \n",
      "                    molecule: protein (p53 tumor suppressor )\n",
      "                       chain: a, b, c\n",
      "                  engineered: yes\n",
      "\n",
      "\n",
      "\n",
      "              source:\n",
      "\n",
      "                       1:\n",
      "                        misc: \n",
      "                   synthetic: yes\n",
      "\n",
      "\n",
      "                       2:\n",
      "                        misc: \n",
      "                   synthetic: yes\n",
      "\n",
      "\n",
      "                       3:\n",
      "                        misc: \n",
      "         organism_scientific: homo sapiens\n",
      "             organism_common: human\n",
      "              organism_taxid: 9606\n",
      "                   cell_line: a431\n",
      "                        cell: human vulva carcinoma\n",
      "           expression_system: escherichia coli\n",
      "        expression_system_taxid: 562\n",
      "        expression_system_plasmid: pet3d\n",
      "\n",
      "\n",
      "            keywords: antigen p53, antitumor protein/dna complex\n",
      "             journal: AUTH   Y.CHO,S.GORINA,P.D.JEFFREY,N.P.PAVLETICHTITL   CRYSTAL STRUCTURE OF A P53 TUMOR SUPPRESSOR-DNATITL 2 COMPLEX: UNDERSTANDING TUMORIGENIC MUTATIONS.REF    SCIENCE                       V. 265   346 1994REFN                   ISSN 0036-8075PMID   8023157\n"
     ]
    }
   ],
   "source": [
    "def print_pdb_headers(headers, indent=0):\n",
    "    ind_text = ' ' * indent\n",
    "    for header, content in headers.items():\n",
    "        if type(content) == dict:\n",
    "            print('\\n%s%20s:' % (ind_text, header))\n",
    "            print_pdb_headers(content, indent + 4)\n",
    "            print()\n",
    "        elif type(content) == list:\n",
    "            print('%s%20s:' % (ind_text, header))\n",
    "            for elem in content:\n",
    "                print('%s%21s %s' % (ind_text, '->', elem))\n",
    "        else:\n",
    "            print('%s%20s: %s' % (ind_text, header, content))\n",
    "\n",
    "print_pdb_headers(p53_1tup.header)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'1': {'misc': '', 'molecule': \"dna (5'-d(*tp*tp*tp*cp*cp*tp*ap*gp*ap*cp*tp*tp*gp*cp*cp*cp*a p*ap*tp*tp*a)-3') \", 'chain': 'e', 'engineered': 'yes'}, '2': {'misc': '', 'molecule': \"dna (5'-d(*ap*tp*ap*ap*tp*tp*gp*gp*gp*cp*ap*ap*gp*tp*cp*tp*a p*gp*gp*ap*a)-3') \", 'chain': 'f', 'engineered': 'yes'}, '3': {'misc': '', 'molecule': 'protein (p53 tumor suppressor )', 'chain': 'a, b, c', 'engineered': 'yes'}}\n",
      "{'1': {'misc': '', 'molecule': 'tumor suppressor p53 (oligomerization domain)', 'chain': 'a, b, c, d', 'engineered': 'yes'}}\n",
      "{'1': {'misc': '', 'molecule': 'mdm2', 'chain': 'a', 'synonym': 'mdm2', 'engineered': 'yes'}, '2': {'misc': '', 'molecule': 'p53', 'chain': 'b', 'fragment': 'residues 13 - 29', 'engineered': 'yes'}}\n"
     ]
    }
   ],
   "source": [
    "print(p53_1tup.header['compound'])\n",
    "print(p53_1olg.header['compound'])\n",
    "print(p53_1ycq.header['compound'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "1TUP - Chain: E. Number of residues: 43. Number of atoms: 442.\n",
      "1TUP - Chain: F. Number of residues: 35. Number of atoms: 449.\n",
      "1TUP - Chain: A. Number of residues: 395. Number of atoms: 1734.\n",
      "1TUP - Chain: B. Number of residues: 265. Number of atoms: 1593.\n",
      "1TUP - Chain: C. Number of residues: 276. Number of atoms: 1610.\n",
      "\n",
      "1OLG - Chain: A. Number of residues: 42. Number of atoms: 698.\n",
      "1OLG - Chain: B. Number of residues: 42. Number of atoms: 698.\n",
      "1OLG - Chain: C. Number of residues: 42. Number of atoms: 698.\n",
      "1OLG - Chain: D. Number of residues: 42. Number of atoms: 698.\n",
      "\n",
      "1YCQ - Chain: A. Number of residues: 123. Number of atoms: 741.\n",
      "1YCQ - Chain: B. Number of residues: 16. Number of atoms: 100.\n"
     ]
    }
   ],
   "source": [
    "def describe_model(name, pdb):\n",
    "    print()\n",
    "    for model in pdb:\n",
    "        for chain in model:\n",
    "            print('%s - Chain: %s. Number of residues: %d. Number of atoms: %d.' %\n",
    "                  (name, chain.id, len(chain), len(list(chain.get_atoms()))))\n",
    "describe_model('1TUP', p53_1tup)\n",
    "describe_model('1OLG', p53_1olg)\n",
    "describe_model('1YCQ', p53_1ycq)\n",
    "#will go deep in a next recipe (bottom up)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('H_ ZN', 951, ' ')\n",
      "('H_ ZN', 952, ' ')\n",
      "('H_ ZN', 953, ' ')\n"
     ]
    }
   ],
   "source": [
    "for residue in p53_1tup.get_residues():\n",
    "    if residue.id[0] in [' ', 'W']:\n",
    "        continue\n",
    "    print(residue.id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<Residue SER het=  resseq=94 icode= >\n",
      "<Atom N> 858 N\n",
      "<Atom CA> 859 C\n",
      "<Atom C> 860 C\n",
      "<Atom O> 861 O\n",
      "<Atom CB> 862 C\n",
      "<Atom OG> 863 O\n",
      "<Atom CA>\n"
     ]
    }
   ],
   "source": [
    "res = next(p53_1tup[0]['A'].get_residues())\n",
    "print(res)\n",
    "for atom in res:\n",
    "    print(atom, atom.serial_number, atom.element)\n",
    "print(p53_1tup[0]['A'][94]['CA'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1TUP:B SSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNT 219\n",
      "1OLG:B KKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG 42\n",
      "1YCQ:B PLSQETFSDLWKLLPEN 17\n"
     ]
    }
   ],
   "source": [
    "from Bio.SeqIO import PdbIO, FastaIO\n",
    "\n",
    "def get_fasta(pdb_file, fasta_file, transfer_ids=None):\n",
    "    fasta_writer = FastaIO.FastaWriter(fasta_file)\n",
    "    fasta_writer.write_header()\n",
    "    for rec in PdbIO.PdbSeqresIterator(pdb_file):\n",
    "        if len(rec.seq) == 0:\n",
    "            continue\n",
    "        if transfer_ids is not None and rec.id not in transfer_ids:\n",
    "            continue\n",
    "        print(rec.id, rec.seq, len(rec.seq))\n",
    "        fasta_writer.write_record(rec)\n",
    "        \n",
    "get_fasta(open('pdb1tup.ent'), open('1tup.fasta', 'w'), transfer_ids=['1TUP:B'])\n",
    "get_fasta(open('pdb1olg.ent'), open('1olg.fasta', 'w'), transfer_ids=['1OLG:B'])\n",
    "get_fasta(open('pdb1ycq.ent'), open('1ycq.fasta', 'w'), transfer_ids=['1YCQ:B'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
